#ifndef __DICTPRODUCER_H__
#define __DICTPRODUCER_H__

#include "SplitToolCppJieba.h"
#include "SplitTool.h"
#include "Configuration.h"
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <utility>
#include <map>
#include <set>

using std::cout;
using std::set;
using std::map;
using std::pair;
using std::vector;
using std::string;
using std::ofstream;

//一个独立的程序, 输入语料, 输出词典文件和词典索引文件
class DictProducer
{
public :
    DictProducer();
    DictProducer(const string&);
    DictProducer(const string&, SplitTool*);
    void buildEnDict();
    void buildCnDict();
    void createIndex();
    void store();
private :
    set<string> _EnFiles; //英文文件
    set<string> _CnFiles; //英文文件
    vector<pair<string, int>> _EnDict; //英文词典
    vector<pair<string, int>> _CnDict; //中文词典
    map<string, set<int>> _index; //索引
    SplitTool* _cuttor;
};
#endif
